---
output: html_document
---

```{r setup, include=FALSE}

knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE, comment = "")

library(tidyverse)
library(nortest)
library(FSA)
library(tidymodels)
library(factoextra)
library(ggpubr)
library(ggrepel)
library(grid)
library(pheatmap)
library(plotly)
library(corrr)
library(reshape)
library(RColorBrewer)
library(usemodels)
library(ranger)
library(vip)
library(MASS)

```

# Dysregulation of hemoglobin and oxygenation in cerebral malaria {.tabset}

```{r data}

# read in data
df_hbox <- read.csv("Data/98 first visit records with cerebral oxygenation stats_trimmed.csv") %>% as_tibble()

# get rid of "Other"
df_hbox <- df_hbox %>% 
  filter(Status != "Other") %>% 
  mutate(Status = factor(Status, levels = c("HV", "UM", "CM")))

# identify duplicated rows (TM0003, TM2001)
df_hbox <- df_hbox %>% filter(duplicated(Subject.ID..NIAID.) == FALSE)

# Convert age to numeric
df_hbox <- df_hbox %>% 
  mutate(Age = gsub(" Years, ", "_", Age)) %>% 
  mutate(Age = gsub(" Months ", "", Age)) %>% 
  separate(Age, into = c("Years", "Months"), sep = "_") %>% 
  mutate(Years = as.numeric(Years)) %>% 
  mutate(Months = as.numeric(Months)/12) %>% 
  mutate(Age = Years + Months) %>% 
  dplyr::select(-c(Years, Months))

# Convert sex to factor
df_hbox <- df_hbox %>% mutate(Sex = as.factor(Sex))

# Trim for variables of interest (VOI)
df_hbox_trim <- df_hbox %>% 
  dplyr::select(c(Subject.ID..NIAID., Status, Glucose, Hematocrit, Lactate, Temperature,
                  Arginine.umol.L, Haptoglobin..mg.dl., Hemoglobin..uM., Whole.Blood.Nitrite, 
                  cerebral.hbdeox, cerebral.hbdeox.alpha, cerebral.hbox, cerebral.hbox.alpha,
                  cerebral.sat, cerebral.sat.alpha, cerebral.thc, cerebral.thc.alpha, cerebral.thc.norm.std)) %>% 
  
  dplyr::rename("subject_id" = "Subject.ID..NIAID.", "Arginine (umol/L)" = "Arginine.umol.L", "Haptoglobin (mg/dL)" = "Haptoglobin..mg.dl.", 
                "Hemoglobin (uM)" = "Hemoglobin..uM.", "WB_Nitrite" = "Whole.Blood.Nitrite", "DeoxyHb" = "cerebral.hbdeox", 
                "DeoxyHb_alpha" = "cerebral.hbdeox.alpha", "OxyHb" = "cerebral.hbox", "OxyHb_alpha" = "cerebral.hbox.alpha", 
                "O2sat" = "cerebral.sat", "O2sat_alpha" = "cerebral.sat.alpha", "THC" = "cerebral.thc", 
                "THC_alpha" = "cerebral.thc.alpha", "THC_sd" = "cerebral.thc.norm.std") 

# impute missing data with median by group
f_impute <- function(x, na.rm = TRUE) (replace(x, is.na(x), median(x, na.rm = na.rm)))

df_hbox_impute <- df_hbox_trim %>% 
  group_by(Status) %>% 
  mutate_at(3:ncol(.), f_impute) %>% 
  ungroup()

# pivot long
df_hbox_long <- df_hbox_trim %>% pivot_longer(cols = 3:ncol(.), names_to = "measurement", values_to = "value")

```

## Exploratory analysis 

#### Histogram to explore normality of data
```{r plot1, fig.width = 15, fig.height = 8}

df_hbox_long %>% 
  
  ggplot(aes(x = value)) +
  geom_histogram(bins = 20) +
  facet_grid(Status ~ measurement, scales = "free") +
  
  theme(strip.text.y = element_text(size = 15),
        strip.text.x = element_text(size = 11),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size = 15),
        axis.text = element_text(size = 12),
        axis.text.x = element_text(angle = 45, hjust = 0.9))

```

#### Plots of all the variables by status
```{r plot2, fig.width = 15, fig.height = 15}

df_hbox_long %>% 
  
  ggplot(aes(x = Status, y = value)) +
  geom_violin(aes(color = Status)) +
  geom_jitter(position = position_jitter(0.2), shape = 1) +
  stat_summary(fun = "median", geom = "crossbar", aes(color = Status), size = 0.2, width = 0.5) +
  facet_wrap(~measurement, scales = "free_y") +
  
  theme_bw() +
  theme(legend.position = "none")  +
  
  theme(strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.title.y = element_blank(),
        axis.text = element_text(size = 15))

```

## PCA 

#### Can we cluster patients based on available variables? 

```{r pca_data}

# convert tibble to df
df_hbox_impute_df <- df_hbox_impute %>% dplyr::select(-c(subject_id, Status)) %>% as.data.frame(df_hbox_impute) 
rownames(df_hbox_impute_df) <- df_hbox_impute$subject_id

# compute PCA
my_pca <- prcomp(df_hbox_impute_df, scale = TRUE)

# define groups
groups <- as.factor(df_hbox_impute$Status)

# Eigenvalues
pca_eig_val <- get_eigenvalue(my_pca)

# Results for Variables
pca_res_var <- get_pca_var(my_pca)

# Results for individuals
pca_res_ind <- get_pca_ind(my_pca)

```

#### 2D PCA
```{r pca_2d, fig.width = 10, fig.height = 10}

df_pnts <- my_pca$x %>% as_tibble() %>% mutate(group = groups)

df_pnts %>% 
  
  ggplot(aes(x = PC1, y = PC2, color = group, fill = group)) +
  geom_point(size = 3) +
  stat_conf_ellipse(level = 0.95, npoint = 100, bary = TRUE, alpha = 0.1, geom = "polygon") +
  stat_mean(aes(shape = group), na.rm = FALSE) +
  geom_vline(xintercept = 0, lty = 2, color = "black") +
  geom_hline(yintercept = 0, lty = 2, color = "black") +
  
  xlab(paste0("PC1: ", round(pca_eig_val$variance.percent[1], 2), "% of variance")) +
  ylab(paste0("PC1: ", round(pca_eig_val$variance.percent[2], 2), "% of variance")) +

  theme_bw() 

```

#### Biplot
```{r pca_biplot, fig.width = 10, fig.height = 10}

df_vars <- my_pca$rotation %>% as_tibble() %>% mutate(variable = rownames(my_pca$rotation))

# plot

  # plot patients
  ggplot() +
  geom_point(data = df_pnts, mapping = aes(x = PC1, y = PC2, color = group), 
             size = 3) +
  stat_conf_ellipse(data = df_pnts, mapping = aes(x = PC1, y = PC2, color = group, fill = group),
                    level = 0.95, npoint = 100, bary = TRUE, alpha = 0.1, geom = "polygon") +
  stat_mean(data = df_pnts, mapping = aes(x = PC1, y = PC2, color = group, shape = group),
            na.rm = FALSE) +
  
  # plot variables
  geom_point(data = df_vars, aes(x = PC1*10, y = PC2*10)) +
  geom_text_repel(data = df_vars, aes(x = PC1*10, y = PC2*10, label = variable, size = 10), show.legend = FALSE) +
  geom_segment(data = df_vars, aes(x = 0, y = 0, xend = PC1*10, yend = PC2*10), arrow = arrow()) +

  # draw lines
  geom_vline(xintercept = 0, lty = 2, color = "black") +
  geom_hline(yintercept = 0, lty = 2, color = "black") +
  
  # design
  xlab(paste0("PC1: ", round(pca_eig_val$variance.percent[1], 2), "% of variance")) +
  ylab(paste0("PC2: ", round(pca_eig_val$variance.percent[2], 2), "% of variance")) +

  theme_bw() 

```

#### Determine biggest contributors to each PC
```{r pca_contrib}

df_pca_contrib <- pca_res_var$contrib %>% 
  as_tibble(rownames = "variable") %>% 
  arrange(-Dim.1, -Dim.2)

df_contrib <- tibble(Dim.1 = paste0(df_pca_contrib$variable, 
                                    sep = ": ", 
                                    round(df_pca_contrib$Dim.1, 2)),
                     Dim.2 = paste0(arrange(df_pca_contrib, -Dim.2)$variable, 
                                    sep = ": ", 
                                    round(arrange(df_pca_contrib, -Dim.2)$Dim.2, 2)),
                     Dim.3 = paste0(arrange(df_pca_contrib, -Dim.3)$variable, 
                                    sep = ": ", 
                                    round(arrange(df_pca_contrib, -Dim.3)$Dim.3, 2)))

df_contrib %>% as.data.frame()

```

#### 3D PCA

```{r pca_3d}

df_pca <- pca_res_ind$coord %>% 
  as_tibble(rownames = "subject") %>% 
  mutate(Status = factor(df_hbox_impute$Status, levels = c("UM", "HV", "CM")))

plot_ly(x = df_pca$Dim.1, y = df_pca$Dim.2, z = df_pca$Dim.3, 
        type = "scatter3d", mode = "markers", color = df_pca$Status)


```

## Variable correlations

```{r correlation, fig.heigh = 10, fig.width = 10}

m_cor <- df_hbox_impute %>% dplyr::select(-c(subject_id, Status)) %>% cor()

melt(m_cor) %>% 
  ggplot(aes(x = X1, y = X2, fill = value, label = round(value,2))) + 
  geom_tile() +
  scale_fill_gradient2(low = "#4575b4", high = "#d73027") +
  geom_text(size = 3) +
  
  xlab("") +
  ylab("") +
  theme(axis.text.x = element_text(angle = 45, hjust = 0.9))

```

## Random forest predictive modeling {.tabset} 

### All variables 

#### Build, tune, and finalize model
- Random forest model
- bootstrap resampling (w/ replacement)
- BoxCox transform all predictors, remove variables with zero variance, kNN imputation
- tune for optimal ROC AUC and accuracy

```{r model1}

# Data
hbox_df <- df_hbox_trim %>% dplyr::select(-subject_id)

# Build a model

  # split data set
  set.seed(123)
  hbox_split <- initial_split(hbox_df, strata = Status)
  hbox_train <- training(hbox_split)
  hbox_test <- testing(hbox_split)  
  
  # resamples (not a lot of data, need to use cross validation)
  set.seed(234)
  # vfold_cv(hbox_train, strata = Status) # test too small
  hbox_folds <- bootstraps(hbox_train, strata = Status)
  
  # Scaffolding for setting up common types of models
  # use_ranger(Status ~ ., data = hbox_df)
  
  # Copy usemodels code...
  
  # Create recipe
  ranger_recipe <- 
    recipe(formula = Status ~ ., data = hbox_df) %>% 
    
    # BoxCox transformation to normality
    step_BoxCox(all_predictors(), -all_outcomes()) %>% 
    
    # remove variables with non-zero variance
    step_nzv(all_predictors(), -all_outcomes()) %>% 
    
    # impute missing data using K-nearest neighbors
    step_knnimpute(all_predictors(), -all_outcomes()) 
  
  # Model specifications (set for tuning)
  ranger_spec <- 
    rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>% 
    set_mode("classification") %>% 
    set_engine("ranger") 
  
  # Create workflow
  ranger_workflow <- 
    workflow() %>% 
    add_recipe(ranger_recipe) %>% 
    add_model(ranger_spec) 

# Tune model

  # Initial tune on bootstraps
  set.seed(84374)
  ranger_tune <-
    tune_grid(ranger_workflow, 
              resamples = hbox_folds, 
              grid = 11)
  
# Finalize model

  # finalize workflow
  final_rf <- ranger_workflow %>% 
    finalize_workflow(select_best(ranger_tune, metric = "accuracy")) 
  
  final_rs_res <- final_rf %>% 
    fit_resamples(resamples = hbox_folds,
                  metrics = metric_set(roc_auc, accuracy, sens, spec),
                  control = control_resamples(save_pred = TRUE))
  
  hbox_fit <- last_fit(final_rf, hbox_split) # fitting to training data, evaluating on testing
  
```

#### Results

```{r model1_res}

# computed on test set
collect_metrics(hbox_fit) %>% dplyr::select(.metric, .estimate) %>% as.data.frame()

# collect predictions on test set
df_pred <- collect_predictions(hbox_fit) %>%
  dplyr::select(c(Status, .pred_HV, .pred_UM, .pred_CM, .pred_class))

df_pred %>% as.data.frame()
  
df_pred %>% filter(Status != .pred_class) %>% as.data.frame() # only got one wrong

```

```{r model1_plot_res}

conf_mat_resampled(final_rs_res) %>% 
  
  ggplot(aes(x = Truth, y = Freq, fill = Prediction, label = Freq)) + 
  geom_col(position = "dodge") +
  geom_text(position = position_dodge(width = 0.9)) +
  ggtitle("Confusion matrix") +
  theme_bw()

```

#### Determine variable importance
```{r model1_imp}

# new specification for ranger model
imp_spec <- ranger_spec %>% 
  finalize_model(select_best(ranger_tune, metric = "accuracy")) %>% 
  set_engine("ranger", importance = "permutation")

# plot importance 
workflow() %>% 
  add_recipe(ranger_recipe) %>% 
  add_model(imp_spec) %>% 
  fit(hbox_train) %>% 
  pull_workflow_fit() %>% 
  vip(aesthetics = list(alpha = 0.8, fill = "midnightblue")) +
  
  theme_bw() +
  ggtitle("Variable importance")

```

### NIRS variables only

#### Build, tune, and finalize model
Same code, but built model only using variables measured using NIRS

```{r model2}

# Data
hbox_df2 <- df_hbox_impute %>% 
  dplyr::select(c(Status, DeoxyHb, DeoxyHb_alpha, OxyHb, OxyHb_alpha, O2sat, O2sat_alpha, THC, THC_alpha, THC_sd))

# Build a model

  # split data set
  set.seed(456)
                hbox_split2 <- initial_split(hbox_df2, strata = Status)
                hbox_train2 <- training(hbox_split2)
                hbox_test2 <- testing(hbox_split2)  
  
  # resamples (not a lot of data, need to use cross validation)
  set.seed(789)
  hbox_folds2 <- bootstraps(hbox_train2, strata = Status)
  
  # Scaffolding for setting up common types of models
  # use_ranger(Status ~ ., data = hbox_df2)
  
  # Copy usemodels code...
  
  # Create recipe
  ranger_recipe2 <- 
    recipe(formula = Status ~ ., data = hbox_df2) %>% 
    
    # BoxCox transformation to normality
    step_BoxCox(all_predictors(), -all_outcomes()) %>% 
    
    # remove variables with non-zero variance
    step_nzv(all_predictors(), -all_outcomes()) %>% 
    
    # impute missing data using K-nearest neighbors
    step_knnimpute(all_predictors(), -all_outcomes()) 
  
  # Model specifications (set for tuning)
  ranger_spec2 <- 
    rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>% 
    set_mode("classification") %>% 
    set_engine("ranger") 
  
  # Create workflow
  ranger_workflow2 <- 
    workflow() %>% 
    add_recipe(ranger_recipe2) %>% 
    add_model(ranger_spec2) 
  
# Tune model

  # Initial tune on bootstraps
  set.seed(101112)
  ranger_tune2 <-
    tune_grid(ranger_workflow2, 
              resamples = hbox_folds2, 
              grid = 11)
  
# Finalize model

  # finalize workflow
  final_rf2 <- ranger_workflow2 %>% 
    finalize_workflow(select_best(ranger_tune2, metric = "accuracy")) 
  
  
  final_rs_res2 <- final_rf2 %>% 
    fit_resamples(resamples = hbox_folds2,
                  metrics = metric_set(roc_auc, accuracy, sens, spec),
                  control = control_resamples(save_pred = TRUE))
  
  hbox_fit2 <- last_fit(final_rf2, hbox_split2) # fitting to training data, evaluating on testing
  
```

#### Results

```{r model2_res}

# computed on test set
collect_metrics(final_rs_res2) %>% dplyr::select(.metric, mean, std_err, n) %>% as.data.frame()

# collect predictions on test set
df_pred2 <- collect_predictions(hbox_fit2) %>%
  dplyr::select(c(Status, .pred_HV, .pred_UM, .pred_CM, .pred_class))

df_pred2 %>% as.data.frame()
  
df_pred2 %>% filter(Status != .pred_class) %>% as.data.frame() # only got one wrong

```

```{r model2_plot_res}

conf_mat_resampled(final_rs_res2) %>% 
  
  ggplot(aes(x = Truth, y = Freq, fill = Prediction, label = Freq)) + 
  geom_col(position = "dodge") +
  geom_text(position = position_dodge(width = 1)) +
  ggtitle("Confusion matrix") +
  theme_bw()

```

#### Determine variable importance
```{r model2_imp}

# new specification for ranger model
imp_spec2 <- ranger_spec2 %>% 
  finalize_model(select_best(ranger_tune2, metric = "accuracy")) %>% 
  set_engine("ranger", importance = "permutation")

# plot importance 
workflow() %>% 
  add_recipe(ranger_recipe2) %>% 
  add_model(imp_spec2) %>% 
  fit(hbox_train2) %>% 
  pull_workflow_fit() %>% 
  vip(aesthetics = list(alpha = 0.8, fill = "midnightblue")) +

  theme_bw() +
  ggtitle("Variable importance")

```


### Other variables 

#### Build, tune, and finalize model
Same code, but built model only using other variables (Arginine, lactate, hematocrit, temperature, whole blood nitrite, haptoglobin, glucose)

```{r model3}

# Data
hbox_df3 <- df_hbox_impute %>% dplyr::select(-c(subject_id, DeoxyHb, DeoxyHb_alpha, OxyHb, OxyHb_alpha, O2sat, O2sat_alpha, THC, THC_alpha, THC_sd))

# Build a model

  # split data set
  set.seed(1234)
  hbox_split3 <- initial_split(hbox_df3, strata = Status)
  hbox_train3 <- training(hbox_split3)
  hbox_test3 <- testing(hbox_split3)  
  
  # resamples (not a lot of data, need to use cross validation)
  set.seed(2345)
  hbox_folds3 <- bootstraps(hbox_train3, strata = Status)
  
  # Copy usemodels code...
  
  # Create recipe
  ranger_recipe3 <- 
    recipe(formula = Status ~ ., data = hbox_df3) %>% 
    
    # BoxCox transformation to normality
    step_BoxCox(all_predictors(), -all_outcomes()) %>% 
    
    # remove variables with non-zero variance
    step_nzv(all_predictors(), -all_outcomes()) %>% 
    
    # impute missing data using K-nearest neighbors
    step_knnimpute(all_predictors(), -all_outcomes()) 
  
  # Model specifications (set for tuning)
  ranger_spec3 <- 
    rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>% 
    set_mode("classification") %>% 
    set_engine("ranger") 
  
  # Create workflow
  ranger_workflow3 <- 
    workflow() %>% 
    add_recipe(ranger_recipe3) %>% 
    add_model(ranger_spec3) 

# Tune model

  # Initial tune on bootstraps
  set.seed(8434)
  ranger_tune3 <-
    tune_grid(ranger_workflow3, 
              resamples = hbox_folds3, 
              grid = 11)
  
# Finalize model

  # finalize workflow
  final_rf3 <- ranger_workflow3 %>% 
    finalize_workflow(select_best(ranger_tune3, metric = "accuracy")) 
  
  
  final_rs_res3 <- final_rf3 %>% 
    fit_resamples(resamples = hbox_folds3,
                  metrics = metric_set(roc_auc, accuracy, sens, spec),
                  control = control_resamples(save_pred = TRUE))
  
  hbox_fit3 <- last_fit(final_rf3, hbox_split3) # fitting to training data, evaluating on testing
  
```

#### Results

```{r model3_res}

# computed on test set
collect_metrics(final_rs_res3) %>% dplyr::select(.metric, mean, std_err, n) %>% as.data.frame()

# collect predictions on test set
df_pred3 <- collect_predictions(hbox_fit3) %>%
  dplyr::select(c(Status, .pred_HV, .pred_UM, .pred_CM, .pred_class))

df_pred3 %>% as.data.frame()
  
df_pred3 %>% filter(Status != .pred_class) %>% as.data.frame() 

```

```{r model3_plot_res}

conf_mat_resampled(final_rs_res3) %>% 
  
  ggplot(aes(x = Truth, y = Freq, fill = Prediction, label = Freq)) + 
  geom_col(position = "dodge") +
  geom_text(position = position_dodge(width = 1)) +
  ggtitle("Confusion matrix") +
  theme_bw()

```

#### Determine variable importance
```{r model3_imp}

# new specification for ranger model
imp_spec3 <- ranger_spec3 %>% 
  finalize_model(select_best(ranger_tune3, metric = "accuracy")) %>% 
  set_engine("ranger", importance = "permutation")

# plot importance 
workflow() %>% 
  add_recipe(ranger_recipe3) %>% 
  add_model(imp_spec3) %>% 
  fit(hbox_train3) %>% 
  pull_workflow_fit() %>% 
  vip(aesthetics = list(alpha = 0.8, fill = "midnightblue")) +

  theme_bw() +
  ggtitle("Variable importance")

```


## One way ANOVAs {.tabset}

### Influence healthy vs malaria

#### Arginine
```{r arginine_show, echo = TRUE, eval = FALSE}

# create model for BoxCox transformation
arg_model <- glm(`Arginine (umol/L)` ~ Status, data = df_hbox_impute)
boxcox(arg_model, lambda = seq(-0.5, 1, by = 0.1), plotit = TRUE)

# BoxCox transform
df_arg <- df_hbox_impute %>% mutate(arg_transform = (`Arginine (umol/L)`^-0.25 - 1)/-0.25)

# model on transformed data
glm(arg_transform ~ Status, data = df_arg) %>% summary()

```

```{r arg_model}

glm(`Arginine (umol/L)` ~ Status, data = df_hbox_impute) %>% summary()

aov(`Arginine (umol/L)` ~ Status, data = df_hbox_impute) %>% TukeyHSD() %>% 
  .$Status %>% 
  as.data.frame() %>% 
  rownames_to_column("Status") %>% 
  dplyr::select(Status, `p adj`)


```

```{r arg_plot}

df_hbox_long %>% filter(measurement == "Arginine (umol/L)") %>% 
  
  ggplot(aes(x = Status, y = value)) +
  geom_violin(aes(color = Status)) +
  geom_jitter(position = position_jitter(0.2), shape = 1) +
  stat_summary(fun = "median", geom = "crossbar", aes(color = Status), size = 0.2, width = 0.5) +

  theme_bw() +
  theme(legend.position = "none")  +
  ggtitle("Arginine (umol/L)") +
  
  theme(strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.title.y = element_blank(),
        axis.text = element_text(size = 15))

```

#### Temperature
```{r temp}

glm(Temperature ~ Status, data = df_hbox_impute) %>% summary()

aov(Temperature ~ Status, data = df_hbox_impute) %>% TukeyHSD() %>% 
  .$Status %>% 
  as.data.frame() %>% 
  rownames_to_column("Status") %>% 
  dplyr::select(Status, `p adj`)

```

```{r temp_plot}

df_hbox_long %>% filter(measurement == "Temperature") %>% 
  
  ggplot(aes(x = Status, y = value)) +
  geom_violin(aes(color = Status)) +
  geom_jitter(position = position_jitter(0.2), shape = 1) +
  stat_summary(fun = "median", geom = "crossbar", aes(color = Status), size = 0.2, width = 0.5) +

  theme_bw() +
  theme(legend.position = "none")  +
  ggtitle("Temperature") +
  
  theme(strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.title.y = element_blank(),
        axis.text = element_text(size = 15))

```

#### Hematocrit
```{r hct}

glm(Hematocrit ~ Status, data = df_hbox_impute) %>% summary()

aov(Temperature ~ Status, data = df_hbox_impute) %>% TukeyHSD() %>% 
  .$Status %>% 
  as.data.frame() %>% 
  rownames_to_column("Status") %>% 
  dplyr::select(Status, `p adj`)

```

```{r hct_plot}

df_hbox_long %>% filter(measurement == "Hematocrit") %>% 
  
  ggplot(aes(x = Status, y = value)) +
  geom_violin(aes(color = Status)) +
  geom_jitter(position = position_jitter(0.2), shape = 1) +
  stat_summary(fun = "median", geom = "crossbar", aes(color = Status), size = 0.2, width = 0.5) +

  theme_bw() +
  theme(legend.position = "none")  +
  ggtitle("Hematocrit") +
  
  theme(strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.title.y = element_blank(),
        axis.text = element_text(size = 15))

```

### Influence CM vs UM

#### O2sat_alpha
```{r O2sat_alpha}

glm(O2sat_alpha ~ Status, data = df_hbox_impute) %>% summary()

aov(O2sat_alpha ~ Status, data = df_hbox_impute) %>% TukeyHSD() %>% 
  .$Status %>% 
  as.data.frame() %>% 
  rownames_to_column("Status") %>% 
  dplyr::select(Status, `p adj`)

```

```{r O2sat_alpha_plot}

df_hbox_long %>% filter(measurement == "O2sat_alpha") %>% 
  
  ggplot(aes(x = Status, y = value)) +
  geom_violin(aes(color = Status)) +
  geom_jitter(position = position_jitter(0.2), shape = 1) +
  stat_summary(fun = "median", geom = "crossbar", aes(color = Status), size = 0.2, width = 0.5) +

  theme_bw() +
  theme(legend.position = "none")  +
  ggtitle("O2sat_alpha") +
  
  theme(strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.title.y = element_blank(),
        axis.text = element_text(size = 15))

```

#### THC_alpha
```{r THC_alpha}

glm(THC_alpha ~ Status, data = df_hbox_impute) %>% summary()

aov(THC_alpha ~ Status, data = df_hbox_impute) %>% TukeyHSD() %>% 
  .$Status %>% 
  as.data.frame() %>% 
  rownames_to_column("Status") %>% 
  dplyr::select(Status, `p adj`)

```

```{r THC_alpha_plot}

df_hbox_long %>% filter(measurement == "THC_alpha") %>% 
  
  ggplot(aes(x = Status, y = value)) +
  geom_violin(aes(color = Status)) +
  geom_jitter(position = position_jitter(0.2), shape = 1) +
  stat_summary(fun = "median", geom = "crossbar", aes(color = Status), size = 0.2, width = 0.5) +

  theme_bw() +
  theme(legend.position = "none")  +
  ggtitle("THC_alpha") +
  
  theme(strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.title.y = element_blank(),
        axis.text = element_text(size = 15))

```

#### THC
```{r THC}

glm(THC ~ Status, data = df_hbox_impute) %>% summary()

aov(THC ~ Status, data = df_hbox_impute) %>% TukeyHSD() %>% 
  .$Status %>% 
  as.data.frame() %>% 
  rownames_to_column("Status") %>% 
  dplyr::select(Status, `p adj`)

```

```{r THC_plot}

df_hbox_long %>% filter(measurement == "THC") %>% 
  
  ggplot(aes(x = Status, y = value)) +
  geom_violin(aes(color = Status)) +
  geom_jitter(position = position_jitter(0.2), shape = 1) +
  stat_summary(fun = "median", geom = "crossbar", aes(color = Status), size = 0.2, width = 0.5) +

  theme_bw() +
  theme(legend.position = "none")  +
  ggtitle("THC") +
  
  theme(strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.title.y = element_blank(),
        axis.text = element_text(size = 15))

```

#### OxyHb
```{r OxyHb}

glm(OxyHb ~ Status, data = df_hbox_impute) %>% summary()

aov(OxyHb ~ Status, data = df_hbox_impute) %>% TukeyHSD() %>% 
  .$Status %>% 
  as.data.frame() %>% 
  rownames_to_column("Status") %>% 
  dplyr::select(Status, `p adj`)

```

```{r OxyHb_plot}

df_hbox_long %>% filter(measurement == "OxyHb") %>% 
  
  ggplot(aes(x = Status, y = value)) +
  geom_violin(aes(color = Status)) +
  geom_jitter(position = position_jitter(0.2), shape = 1) +
  stat_summary(fun = "median", geom = "crossbar", aes(color = Status), size = 0.2, width = 0.5) +

  theme_bw() +
  theme(legend.position = "none")  +
  ggtitle("OxyHb") +
  
  theme(strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.title.y = element_blank(),
        axis.text = element_text(size = 15))

```

#### Lactate
```{r Lactate}

glm(Lactate ~ Status, data = df_hbox_impute) %>% summary()

aov(Lactate ~ Status, data = df_hbox_impute) %>% TukeyHSD() %>% 
  .$Status %>% 
  as.data.frame() %>% 
  rownames_to_column("Status") %>% 
  dplyr::select(Status, `p adj`)

```

```{r Lactate_plot}

df_hbox_long %>% filter(measurement == "Lactate") %>% 
  
  ggplot(aes(x = Status, y = value)) +
  geom_violin(aes(color = Status)) +
  geom_jitter(position = position_jitter(0.2), shape = 1) +
  stat_summary(fun = "median", geom = "crossbar", aes(color = Status), size = 0.2, width = 0.5) +

  theme_bw() +
  theme(legend.position = "none")  +
  ggtitle("Lactate") +
  
  theme(strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.title.y = element_blank(),
        axis.text = element_text(size = 15))

```

#### ?OxyHb_alpha
```{r OxyHb_alpha}

glm(OxyHb_alpha ~ Status, data = df_hbox_impute) %>% summary()

aov(OxyHb_alpha ~ Status, data = df_hbox_impute) %>% TukeyHSD() %>% 
  .$Status %>% 
  as.data.frame() %>% 
  rownames_to_column("Status") %>% 
  dplyr::select(Status, `p adj`)

```

```{r OxyHb_alpha_plot}

df_hbox_long %>% filter(measurement == "OxyHb_alpha") %>% 
  
  ggplot(aes(x = Status, y = value)) +
  geom_violin(aes(color = Status)) +
  geom_jitter(position = position_jitter(0.2), shape = 1) +
  stat_summary(fun = "median", geom = "crossbar", aes(color = Status), size = 0.2, width = 0.5) +

  theme_bw() +
  theme(legend.position = "none")  +
  ggtitle("OxyHb_alpha") +
  
  theme(strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.title.y = element_blank(),
        axis.text = element_text(size = 15))

```




